Time series of monthly maximum rainfall [mm] in a day
library(xts)
library(zoo)
library(tseries)
library(copula)
setwd("/home/davide/universitĂ /tesi magistrale/dati/arpa")
load("only_rain.RData")
print(coordinates)
## station alt lat long
## 1 Brugnera 22 45.91792 12.54500
## 2 Capriva.del.Friuli 85 45.95809 13.51233
## 3 Cervignano.del.Friuli 8 45.84949 13.33701
## 4 Cividale.del.Friuli 127 46.08044 13.42001
## 5 Codroipo 37 45.95236 13.00274
## 6 Enemonzo 438 46.41042 12.86254
## 7 Fagagna 148 46.10169 13.07389
## 8 Fossalon 0 45.71477 13.45886
## 9 Gemona.del.Friuli 184 46.26130 13.12209
## 10 Gradisca.d.Is. 29 45.88979 13.48181
## 11 Musi 600 46.31266 13.27468
## 12 Palazzolo.dello.Stella 5 45.80572 13.05260
## 13 San.Vito.al.Tgl. 21 45.89566 12.81499
## 14 Sgonico.Zgonik 268 45.73800 13.74206
## 15 Talmassons 16 45.88231 13.15779
## 16 Tarvisio 794 46.51078 13.55189
## 17 Udine.S.O. 91 46.03521 13.22667
## 18 Vivaro 142 46.07653 12.76881
knitr::include_graphics("mappa_stazioni_rain.png")
#in separate plots
trace(plot.zoo,
quote(mtext <- function(...) graphics::mtext(..., cex = 0.5, las = 1)))
## [1] "plot.zoo"
plot.zoo(clean_rain_xts, oma = c(6, 5, 5, 0))
## Tracing plot.zoo(clean_rain_xts, oma = c(6, 5, 5, 0)) on entry
untrace(plot.zoo)
Acf plots and ljung box test
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (x in colnames(clean_rain_xts)){
acf(ts(clean_rain_xts[,x]), main=x) #plot of the ACF
}
#round function not working
for (x in colnames(clean_rain_xts)){
print(paste0("p-value of Ljung-Box test on rain in ", x , " is:", Box.test(clean_rain_xts[,x], lag = 20, type = "Ljung-Box")$p.value))
}
## [1] "p-value of Ljung-Box test on rain in Brugnera is:0.131982372909309"
## [1] "p-value of Ljung-Box test on rain in Capriva.del.Friuli is:0.342420393409757"
## [1] "p-value of Ljung-Box test on rain in Cervignano.del.Friuli is:0.764437631764927"
## [1] "p-value of Ljung-Box test on rain in Cividale.del.Friuli is:0.298603616354461"
## [1] "p-value of Ljung-Box test on rain in Codroipo is:0.194210840006664"
## [1] "p-value of Ljung-Box test on rain in Enemonzo is:0.0252374442208957"
## [1] "p-value of Ljung-Box test on rain in Fagagna is:0.406266394402289"
## [1] "p-value of Ljung-Box test on rain in Fossalon is:0.325963162941074"
## [1] "p-value of Ljung-Box test on rain in Gemona.del.Friuli is:0.023805779326967"
## [1] "p-value of Ljung-Box test on rain in Gradisca.d.Is. is:0.280495475660849"
## [1] "p-value of Ljung-Box test on rain in Musi is:0.00475603287750948"
## [1] "p-value of Ljung-Box test on rain in Palazzolo.dello.Stella is:0.934749836668725"
## [1] "p-value of Ljung-Box test on rain in San.Vito.al.Tgl. is:0.868452009714319"
## [1] "p-value of Ljung-Box test on rain in Sgonico.Zgonik is:0.840696071596907"
## [1] "p-value of Ljung-Box test on rain in Talmassons is:0.909658489009452"
## [1] "p-value of Ljung-Box test on rain in Tarvisio is:2.14358588460639e-05"
## [1] "p-value of Ljung-Box test on rain in Udine.S.O. is:0.253189905807489"
## [1] "p-value of Ljung-Box test on rain in Vivaro is:0.335901375480045"
for (x in colnames(clean_rain_xts)){
print(paste0("p-value of Ljung-Box test on squared rain in ", x , " is:", Box.test(clean_rain_xts[, x]^2, lag = 20, type = "Ljung-Box")$p.value))
}
## [1] "p-value of Ljung-Box test on squared rain in Brugnera is:0.163294547971732"
## [1] "p-value of Ljung-Box test on squared rain in Capriva.del.Friuli is:0.841589296245896"
## [1] "p-value of Ljung-Box test on squared rain in Cervignano.del.Friuli is:0.445652499691442"
## [1] "p-value of Ljung-Box test on squared rain in Cividale.del.Friuli is:0.926739822293949"
## [1] "p-value of Ljung-Box test on squared rain in Codroipo is:0.256656863284502"
## [1] "p-value of Ljung-Box test on squared rain in Enemonzo is:0.0704492125027978"
## [1] "p-value of Ljung-Box test on squared rain in Fagagna is:0.564285551650741"
## [1] "p-value of Ljung-Box test on squared rain in Fossalon is:0.194653546913393"
## [1] "p-value of Ljung-Box test on squared rain in Gemona.del.Friuli is:0.0215392068379517"
## [1] "p-value of Ljung-Box test on squared rain in Gradisca.d.Is. is:0.29739985117221"
## [1] "p-value of Ljung-Box test on squared rain in Musi is:0.0124915872643815"
## [1] "p-value of Ljung-Box test on squared rain in Palazzolo.dello.Stella is:0.799616200060337"
## [1] "p-value of Ljung-Box test on squared rain in San.Vito.al.Tgl. is:0.986513470431944"
## [1] "p-value of Ljung-Box test on squared rain in Sgonico.Zgonik is:0.67278247569016"
## [1] "p-value of Ljung-Box test on squared rain in Talmassons is:0.793492215144275"
## [1] "p-value of Ljung-Box test on squared rain in Tarvisio is:1.09083383346142e-05"
## [1] "p-value of Ljung-Box test on squared rain in Udine.S.O. is:0.0843148611886035"
## [1] "p-value of Ljung-Box test on squared rain in Vivaro is:0.333677758332719"
Slight seasonality effect on some stations (Tarvisio, Gemona, Enemonzo,… ), seasonally adjusted in R file, see the R file for the computation of pseudo-observations. The data can be used directly without passing through residuals, transformation of the data to pseudo-observations, and pairwise scatterplots.
pairs2(clean_rain, cex=0.6)
library(heatmaply)
heatmaply_cor(corKendall(clean_rain), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none")
Exchangeability test done on the lower triangular matrix (all elements of pairs2 below the matrix diagonal) high p-value for almost all (all exchangeable)
which(as.numeric(exchangeability_test_matrix[3,]) < 0.05)
## [1] 35 56 68 112 117 118 126 127 129
The low p-value for exchangeability is for the cases:
exchangeability_test_matrix[,which(as.numeric(exchangeability_test_matrix[3,]) < 0.05)]
## [,1] [,2] [,3]
## [1,] "Cervignano.del.Friuli" "Cividale.del.Friuli" "Codroipo"
## [2,] "Codroipo" "Palazzolo.dello.Stella" "Musi"
## [3,] "0.00949050949050949" "0.0134865134865135" "0.0014985014985015"
## [,4] [,5] [,6]
## [1,] "Gemona.del.Friuli" "Gemona.del.Friuli" "Gradisca.d.Is."
## [2,] "San.Vito.al.Tgl." "Vivaro" "Musi"
## [3,] "0.0024975024975025" "0.0154845154845155" "0.0164835164835165"
## [,7] [,8] [,9]
## [1,] "Musi" "Musi" "Musi"
## [2,] "Palazzolo.dello.Stella" "San.Vito.al.Tgl." "Talmassons"
## [3,] "0.0474525474525475" "0.0044955044955045" "0.0104895104895105"
Looking at data it seems that it is a problem of not enough data rather than non-exchangeability
More or less half of the pairs are radially symmetric
which(as.numeric(radial_test_matrix[3,]) < 0.05)
## [1] 3 4 5 6 7 8 10 11 14 16 19 21 24 25 26 27 30 31 38
## [20] 41 50 53 55 57 60 63 64 66 68 73 76 81 82 84 86 89 91 92
## [39] 93 95 96 104 105 111 112 114 116 117 122 126 127 129 131 138 141 142 144
## [58] 148 150 151 153
#radial_test_matrix[,which(as.numeric(radial_test_matrix[3,]) < 0.05)]
low p-value for all (no extreme value dependence)
which(as.numeric(extreme_value_test_matrix[3,]) > 0.05)
## integer(0)
diagonal independence, upper curve perfect positive dependence. On all there is strong lower tail dependence (“drought” periods are extendend to the whole area), not on all there is a strong upper tail dependence (concurrent extreme rainfall is rarer and occurrs in close stations)
library(VineCopula)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
heatmaply_cor(lower_tdc, xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none")
Both heatmaps confirm what seen from Kendall distribution function
heatmaply_cor(upper_tdc, xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none")
The lower tdc is high and similar for all couples so it is not useful for clustering. The clustering will be done using the dissimilarity matrix of the upper tdc (-log(upper_tdc)) Different methods for the clustering are tested
dissimilarity_diff_upper<- (1-upper_tdc)^0.5
upper_clustering_diff_average<-hclust(d=as.dist(dissimilarity_diff_upper), method="average")
upper_clustering_diff_complete <- hclust(d=as.dist(dissimilarity_diff_upper), method = "complete")
plot(upper_clustering_diff_average, main="Average")
abline(h=0.91, col="blue")
plot(upper_clustering_diff_complete, main="Complete")
abline(h=0.91, col="blue")
With average there are 5 groups
knitr::include_graphics("mappa_stazioni_rain_cluster_complete_utdc.png")
cluster11<-clean_rain[,c("Enemonzo", "Tarvisio")]
cluster12<-clean_rain[,c("Brugnera", "Vivaro")]
cluster13<-clean_rain[,c("Fagagna", "Gemona.del.Friuli", "Musi")]
cluster14<-clean_rain[,c("Talmassons", "Cividale.del.Friuli", "Udine.S.O.", "Palazzolo.dello.Stella","San.Vito.al.Tgl.", "Codroipo")]
cluster15<-clean_rain[,c("Fossalon", "Sgonico.Zgonik", "Gradisca.d.Is.", "Capriva.del.Friuli", "Cervignano.del.Friuli")]
Scatterplots of clusters K-plot and Heatmap of tdc in clusters:
pairs2(cluster11, main="cluster11")
heatmaply_cor(fitLambda(cluster11, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster11")
pairwise<-combn(colnames(cluster11), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster11), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster11")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster12, main="cluster12")
heatmaply_cor(fitLambda(cluster12, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster12")
pairwise<-combn(colnames(cluster12), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster12), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster12")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster13, main="cluster13")
heatmaply_cor(fitLambda(cluster13, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster13")
pairwise<-combn(colnames(cluster13), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster13), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster13")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster14, main="cluster14")
heatmaply_cor(fitLambda(cluster14, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster14")
pairwise<-combn(colnames(cluster14), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster14), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster14")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster15, main="cluster15")
heatmaply_cor(fitLambda(cluster15, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster15")
pairwise<-combn(colnames(cluster15), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster15), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster15")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
tau_matrix<-corKendall(clean_rain)
dissimilarity_diff_ken<- (2*(1-tau_matrix))^0.5
ken_clustering_diff_average<-hclust(d=as.dist(dissimilarity_diff_ken), method="average")
ken_clustering_diff_complete <- hclust(d=as.dist(dissimilarity_diff_ken), method = "complete")
plot(ken_clustering_diff_average, main="Average")
plot(ken_clustering_diff_complete, main="Complete")
abline(h=1.1, col="blue")
knitr::include_graphics("mappa_stazioni_rain_cluster_complete_Kendall.png")
cluster1<-clean_rain[,c("Enemonzo", "Tarvisio", "Gemona.del.Friuli", "Musi")]
cluster2<-clean_rain[,c("Talmassons", "Cividale.del.Friuli", "Udine.S.O.", "Palazzolo.dello.Stella","San.Vito.al.Tgl.", "Codroipo", "Cervignano.del.Friuli", "Brugnera", "Vivaro", "Fagagna")]
cluster3<-clean_rain[,c("Fossalon", "Sgonico.Zgonik", "Gradisca.d.Is.", "Capriva.del.Friuli")]
pairs2(cluster1, main="cluster1")
heatmaply_cor(fitLambda(cluster1, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster1")
pairwise<-combn(colnames(cluster1), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster1), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster1")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster2, main="cluster2")
heatmaply_cor(fitLambda(cluster2, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster2")
pairwise<-combn(colnames(cluster2), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster2), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster2")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster13, main="cluster3")
heatmaply_cor(fitLambda(cluster3, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster3")
pairwise<-combn(colnames(cluster3), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster3), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster3")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
Function to compute AIC to select best family
AIC_cop<-function(cluster){
cop_name <- c('clayton','frank','gumbel','normal','normal_un','t','t_un','joe')
cop <- c(claytonCopula(dim=dim(cluster)[2]),gumbelCopula(dim=dim(cluster)[2]), frankCopula(dim=dim(cluster)[2]),
normalCopula(dim=dim(cluster)[2]), normalCopula(dim=dim(cluster)[2], dispstr="un"),
tCopula(dim = dim(cluster)[2], df.fixed=FALSE), tCopula(dim = dim(cluster)[2], dispstr="un", df.fixed=FALSE),
joeCopula(dim=dim(cluster)[2]))
fit.copula <- list()
for (i in 1:length(cop)){
tmp <- try(fitCopula(cop[[i]], cluster, method="mpl",optim.method ="BFGS"), silent=TRUE)
if (class(tmp)=="try-error"){
tmp <- try(fitCopula(cop[[i]], cluster, method="itau"), silent=TRUE)
cat(c("itau",i,"\n"))
}
fit.copula[[i]] <- tmp
}
#---- Summary of 3D copula model
fit.estimate <- vector()
fit.loglik <- vector()
aic.result <- vector()
for (i in 1:length(cop)){
fit.estimate[i] <- coef(fit.copula[[i]])[1]
fit.loglik[i] <- fit.copula[[i]]@loglik
aic.result[i] <- AIC(fit.copula[[i]])
}
#---- Best copula declaration 3D
WIN_AIC <- which.min(cbind(aic.result))
print(cop_name[WIN_AIC]);
fit.copula[[WIN_AIC]]
}
both exchangeable and radially symmetric
pairs2(cluster11, main="cluster11")
radSymTest(cluster11)$p.value
## [1] 0.2482517
exchTest(cluster11)$p.value
## [1] 0.2542458
N<-1000
t.copula_cluster11 <- tCopula(dim = dim(cluster11)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster11, x=cluster11, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.018096, parameter = 0.7207, p-value = 0.3661
fit.normal <- fitCopula(normalCopula(dim = dim(cluster11)[2]),
data = cluster11, method = "mpl")
normal <- fit.normal@copula # fitted copula
gofCopula(normal, x = cluster11, N = N)
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
##
## Parametric bootstrap-based goodness-of-fit test of Normal copula, dim.
## d = 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.019067, parameter = 0.74328, p-value = 0.2902
AIC_cop(cluster11)
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 13.8800995127842
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 13.8800995127842
## [1] "normal"
## Call: fitCopula(cop[[i]], data = cluster, ... = pairlist(method = "mpl", optim.method = "BFGS"))
## Fit based on "maximum pseudo-likelihood" and 240 2-dimensional observations.
## Copula: normalCopula
## rho.1
## 0.7433
## The maximized loglikelihood is 93.16
## Optimization converged
both exchangeable and radially symmetric
pairs2(cluster12, main="cluster12")
radSymTest(cluster12)$p.value
## Warning in radSymTest(cluster12): argument 'ties' set to TRUE
## [1] 0.1263736
exchTest(cluster12)$p.value
## Warning in exchTest(cluster12): argument 'ties' set to TRUE
## [1] 0.6348651
t_un.copula_cluster12 <- tCopula(dim = dim(cluster12)[2], dispstr = "un", df.fixed = TRUE)
t.copula_cluster12 <- tCopula(dim = dim(cluster12)[2], df.fixed = TRUE)
gofCopula(t_un.copula_cluster12, x=cluster12, N=N, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.027536, parameter = 0.8245, p-value = 0.04645
gofCopula(t.copula_cluster12, x=cluster12, N=N, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.027535, parameter = 0.8245, p-value = 0.05944
AIC_cop(cluster12)
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 2.61065507353008
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 2.61065507353008
## [1] "t"
## Call: fitCopula(cop[[i]], data = cluster, ... = pairlist(method = "mpl", optim.method = "BFGS"))
## Fit based on "maximum pseudo-likelihood" and 240 2-dimensional observations.
## Copula: tCopula
## rho.1 df
## 0.8077 2.6107
## The maximized loglikelihood is 142.2
## Optimization converged
exchangeable but not radially symmetric
pairs2(cluster13, main="cluster13")
cloud2(cluster13, xlab = (colnames(cluster13)[1]), ylab = (colnames(cluster13)[2]), zlab = (colnames(cluster13)[3])) #cloudplot
radSymTest(cluster13)$p.value
## Warning in radSymTest(cluster13): argument 'ties' set to TRUE
## [1] 0.04445554
exchTest(cluster13)$p.value
## Warning in exchTest(cluster13): argument 'ties' set to TRUE
## [1] 0.9955045
#both exchangeable and radially symmetric
t.copula_cluster13 <- tCopula(dim = dim(cluster13)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster13, x=cluster13, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 3, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.041543, parameter1 = 0.72476, parameter2 = 0.61707,
## parameter3 = 0.80042, p-value = 0.09241
AIC_cop(cluster13)
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 5.91195377186503
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 7.86933599724782
## [1] "t_un"
## Call: fitCopula(cop[[i]], data = cluster, ... = pairlist(method = "mpl", optim.method = "BFGS"))
## Fit based on "maximum pseudo-likelihood" and 240 3-dimensional observations.
## Copula: tCopula
## rho.1 rho.2 rho.3 df
## 0.7475 0.6420 0.8156 7.8693
## The maximized loglikelihood is 227
## Optimization converged
exchangeable and radially symmetric (lowish p-value)
pairs2(cluster14, main="cluster14")
radSymTest(cluster14)$p.value
## Warning in radSymTest(cluster14): argument 'ties' set to TRUE
## [1] 0.06243756
exchTest(cluster14)$p.value
## Warning in exchTest(cluster14): argument 'ties' set to TRUE
## [1] 0.9315684
t.copula_cluster14 <- tCopula(dim = dim(cluster14)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster14, x=cluster14, N=N, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 6, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.04293, parameter1 = 0.75121, parameter2 = 0.82238,
## parameter3 = 0.85020, parameter4 = 0.77869, parameter5 = 0.81826,
## parameter6 = 0.85999, parameter7 = 0.65849, parameter8 = 0.67274,
## parameter9 = 0.76231, parameter10 = 0.71793, parameter11 = 0.75000,
## parameter12 = 0.84544, parameter13 = 0.75894, parameter14 = 0.76397,
## parameter15 = 0.84009, p-value = 0.5619
AIC_cop(cluster14)
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 5.19700383053611
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 5.00315432116554
## [1] "t_un"
## Call: fitCopula(cop[[i]], data = cluster, ... = pairlist(method = "mpl", optim.method = "BFGS"))
## Fit based on "maximum pseudo-likelihood" and 240 6-dimensional observations.
## Copula: tCopula
## rho.1 rho.2 rho.3 rho.4 rho.5 rho.6 rho.7 rho.8 rho.9 rho.10 rho.11
## 0.7621 0.8288 0.8560 0.7874 0.8257 0.8662 0.6743 0.6872 0.7738 0.7296 0.7600
## rho.12 rho.13 rho.14 rho.15 df
## 0.8518 0.7673 0.7733 0.8463 5.0032
## The maximized loglikelihood is 812.6
## Optimization converged
exchangeable (lowish p-value) but not radially symmetric. p-value on gof of t copula low but not under threshold…
pairs2(cluster15, main="cluster15")
radSymTest(cluster14)$p.value
## Warning in radSymTest(cluster14): argument 'ties' set to TRUE
## [1] 0.08141858
exchTest(cluster14)$p.value
## Warning in exchTest(cluster14): argument 'ties' set to TRUE
## [1] 0.9145854
t.copula_cluster15 <- tCopula(dim = dim(cluster15)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster15, x=cluster15, N=N, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 5, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.076654, parameter1 = 0.68920, parameter2 = 0.76888,
## parameter3 = 0.70845, parameter4 = 0.76948, parameter5 = 0.71116,
## parameter6 = 0.66176, parameter7 = 0.60501, parameter8 = 0.87559,
## parameter9 = 0.84655, parameter10 = 0.77825, p-value = 0.08042
AIC_cop(cluster15)
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 4.77512091879302
## Warning in var.mpl(copula, u): the covariance matrix of the parameter estimates
## is computed as if 'df.fixed = TRUE' with df = 4.90770529267044
## [1] "t_un"
## Call: fitCopula(cop[[i]], data = cluster, ... = pairlist(method = "mpl", optim.method = "BFGS"))
## Fit based on "maximum pseudo-likelihood" and 240 5-dimensional observations.
## Copula: tCopula
## rho.1 rho.2 rho.3 rho.4 rho.5 rho.6 rho.7 rho.8 rho.9 rho.10 df
## 0.6987 0.7766 0.7195 0.7766 0.7199 0.6731 0.6167 0.8790 0.8525 0.7864 4.9077
## The maximized loglikelihood is 561.9
## Optimization converged
The gof seems to not reject the t-copula hypothesis for any of the clusters except for cluster 12 in which the p-value oscillate around 0.045-0.051 based on the fit; maybe try cross validation instead of gof…